###############################################################################
# Plot 2
###############################################################################
# This Script contains the code to build the second figure
###############################################################################
# Content
###############################################################################
# 1) Dependencies
# 2) Load Data
# 3) Aggregation for Fig. 2
# 4) Fig. 2
###############################################################################
# 1) Dependencies
###############################################################################
library(readr)
library(dplyr)
library(ggplot2)
library(gganimate)
library(ggeffects)
library(ggExtra)
library(ggridges)
library(ggrepel)
library(ggpubr)
library(grid)
library(scales)
library(lubridate)
library(extrafont)
library(reshape2)
library(here)
library(ggforce)
library(png)
library(readxl)
library(grid)
library(gridExtra)
library(ggpubr)
library(ggalt)
library(stringr)
###############################################################################
# 2) Load Data
###############################################################################
# Set Path
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
rm(list=ls())

# Custom functions
# ggplot rescale x axis....
scale_x_reordered <- function(..., sep = "___") {
  reg <- paste0(sep, ".+$")
  ggplot2::scale_x_discrete(labels = function(x) gsub(reg, "", x), ...)
}
# ggplot order over facets...
reorder_within <- function(x, by, within, fun = mean, sep = "___", ...) {
  new_x <- paste(x, within, sep = sep)
  stats::reorder(new_x, by, FUN = fun)
}

suppressWarnings(source('ggplot_theme_ddl.R', encoding = "UTF-8"))

# Load Data
df <- readRDS("../data/smd_ner_2015_2019_combined.RDS")
candidates_list_15 <- read.csv('../support/candidates-2015/00-Named_Entity_List_withID.csv', stringsAsFactors = F) %>% 
  as_tibble %>% mutate(id=as.character(id))
candidates_list_19 <- read.csv('../support/candidates-2019/00-Named_Entity_List_withID.csv', stringsAsFactors = F) %>% 
  as_tibble %>% mutate(id=as.character(id))

candidates_list_19 <- candidates_list_19 %>% dplyr::mutate(candidacy = as.character(gsub("\\s", " ", council))) %>% 
  dplyr::mutate(council = case_when(candidacy %in% c("SR", "Former Staenderat", "Former Staenderat") ~ "sr",
                             candidacy %in% c("NR", "Former Nationalrat", "Former Nationalrat") ~  "nr",
                             candidacy %in% c("SR und NR", "NR und SR") ~ "sr & nr")) %>% 
  dplyr::select(-c(candidacy))

df$year <- as.character(df$year)
df$date <- format(as.Date(df$date, "%m-%d"), format = "%m-%d")
df$fullname <- ifelse(df$fullname == "Adèle Goumaz", "Adèle Thorens Goumaz", df$fullname)
df$fullname <- ifelse(df$fullname == "Niklaus-Samuel Gugger", "Nik Gugger", df$fullname)
df$incumbent <- ifelse(df$fullname == "Philipp Müller", 1, df$incumbent)

# Remove Federal Councilors
council <- TRUE
# Remove Party Presidents
president <- TRUE

# Council members 2015
council_15 <- c("Ueli Maurer", "Alain Berset", "Didier Burkhalter",
                "Simonetta Sommaruga", "Eveline Widmer Schlumpf",
                "Johann Schneider-Ammann", "Doris Leuthard")

# Council members 2019
council_19 <- c("Ueli Maurer", "Alain Berset", "Ignazio Cassis",
                "Simonetta Sommaruga", "Guy Parmelin",
                "Karin Keller-Sutter", "Viola Amherd")

# Party Presidents 2015
presi_15 <- c("Toni Brunner", "Christian Levrat", "Philipp Müller",
              "Christophe Darbellay", "Regula Rytz", "Martin Bäumle", "Martin Landolt")

# Party Presidents 2019
presi_19 <- c("Albert Rösti", "Christian Levrat", "Petra Gössi",
              "Gerhard Pfister", "Regula Rytz", "Jürg Grossen", "Martin Landolt")


# Remove Council Members:
if(council == T){
  df <- df %>% dplyr::filter((year == "2015" & !fullname %in% council_15) |
                               (year == "2019" & !fullname %in% council_19))
}
# Remove Party Presidents:
if(president == T){
  df <- df %>% dplyr::filter((year == "2015" & !fullname %in% presi_15) |
                               (year == "2019" & !fullname %in% presi_19))
}
###############################################################################
# 3) Aggregation for Fig. 2
###############################################################################
names(df)

df$n_candidates_on_list <- ifelse(df$year == "2019", 3599, 2888)

df <- df %>% dplyr::mutate(n_candidates_on_list = case_when(year == "2019" & incumbent == 1 ~ 186,
                                                     year == "2019" & incumbent == 0 ~ 3599,
                                                     year == "2015" & incumbent == 1 ~ 200,
                                                     year == "2015" & incumbent == 0 ~ 2888
))

agg1 <- df %>% dplyr::group_by(year, incumbent, gender, n_candidates_on_list, fullname, person.id) %>%
               dplyr::summarise(n = n()) %>% dplyr::filter(!is.na(fullname) == T)

helper <- filter(agg1, year == "2015")
candidates_list_15 <- candidates_list_15 %>% dplyr::filter(!id %in% helper$person.id)

helper <- filter(agg1, year == "2019")
candidates_list_19 <- candidates_list_19 %>% dplyr::filter(!id %in% helper$person.id)

candidates_list_15 <- candidates_list_15 %>% dplyr::select(c(fullname,gender,incumbent,id)) %>% 
  dplyr::mutate(person.id = id,
                n = 0,
                year = "2015") %>% 
  dplyr::select(-c("id"))

candidates_list_19 <- candidates_list_19 %>% dplyr::select(c(fullname,gender,incumbent,id)) %>% 
  dplyr::mutate(person.id = id,
                n = 0,
                year = "2019") %>% 
  dplyr::select(-c("id"))

df <- df %>% dplyr::mutate(list_place_1 = ifelse(!is.na(list_place_1) == T, list_place_1,
                                          ifelse(council %in% c("sr", ""), 1,
                                          ifelse(council == "nr", 1, NA)))) %>%
             dplyr::mutate(list_place_top = ifelse(list_place_1 <= 5, "Top Place", "Bottom Place"))


agg2 <- df %>% dplyr::group_by(year, selectsclass, gender, fullname) %>%
  dplyr::summarise(n = n()) %>%
  dplyr::filter(!is.na(fullname) == T) %>%
  dplyr::filter(!selectsclass%in% c('Other_unclassified_Political_Texts', 'NotPolitical', 'Not Classified', 'Other_Problems', 'Regions_NationalCohesion')) %>%
  ungroup %>% 
  dplyr::group_by(year,gender,fullname) %>% 
  dplyr::mutate(n_tot = sum(n)) %>% 
  ungroup %>%
  dplyr::mutate(freq = n / n_tot, se = sqrt((freq * (1 - freq)) / n_tot)) %>% # Added standard error calculation
  complete(selectsclass, fullname, year, gender, fill = list(freq = 0, se = 0)) %>% dplyr::group_by(fullname) %>%
  dplyr::group_by(year,gender, fullname) %>% dplyr::filter(any(!is.na(n_tot))) %>%
  dplyr::mutate(freq=ifelse(is.na(freq),0,freq)) %>%
  dplyr::group_by(year,gender,selectsclass) %>%
  dplyr::summarise(mean_freq = mean(freq), se = sqrt(sum(se^2)) / length(se)) # Added error propagation

###############################################################################
# 4) Fig. 2
###############################################################################
# recode the classes to German
vec <- sort(unique(agg2$selectsclass))
.recodr <- list('Agriculture', 'Economy', 'Education & Culture', 'Environment', 'Europe',
                'Finance & Taxes', 'Gender', 'Immigration',
                'International Relations',
                'Labour Market', 'Law & Order', 'Political System', 'Public Health',
                'Services & Infrastructure',
                'Social Security') %>%
  setNames(., vec)

.recodrnobreaks <- list('Agriculture', 'Economy', 'Education & Culture', 'Environment', 'Europe',
                        'Finance & Taxes', 'Gender', 'Immigration',
                        'International Relations',
                        'Labour Market', 'Law & Order', 'Political System', 'Public Health',
                        'Services & Infrastructure',
                        'Social Security') %>%
  setNames(., vec)

agg2 <- agg2 %>% dplyr::mutate(selectsclass=dplyr::recode(selectsclass, !!!.recodr),
                               selectsclass=factor(selectsclass))
# If no error bars change values_from to values_from = mean_freq then the mean_freq_f and Mean_freq_m are just m and f 
agg2_wide <- agg2 %>% tidyr::pivot_wider(names_from = gender, values_from = c(mean_freq, se))

agg2_wide <- agg2_wide %>% dplyr::filter(selectsclass != "Political System")

#Here only m and f if no errorbars 
agg2_wide$diff <- round(((agg2_wide$mean_freq_f - agg2_wide$mean_freq_m) * 100), digits = 2)


agg2_wide$box_min <- ifelse(agg2_wide$year == "2015", .10, .10)
agg2_wide$box_max <- ifelse(agg2_wide$year == "2015", .10, .10)
agg2_wide$box_center <- ifelse(agg2_wide$year == "2015", .09, .09)

# Quick Hack
percent_first <- function(x) {
  #x <- sprintf("%d%%", round(x*100))
  x <- paste0(round(x*100, digits = 1), " %")
  #x[2:length(x)] <- sub("%$", "", x[2:length(x)])
  x
}
agg2_wide$selectsclass <- factor(agg2_wide$selectsclass, levels = rev(c('Gender', 'Immigration', 'Social Security', 'Public Health',
                                                                        'Labour Market', 'Education & Culture', 'International Relations',
                                                                        'Agriculture', 'Europe', 'Law & Order', 'Environment', 
                                                                        'Services & Infrastructure', 'Economy', 'Finance & Taxes'
)))

# Create a named vector for shapes
shapes <- c("Female Mentions" = 15, "Male Mentions" = 17)

#same here if no error bars then m and f instead of mean_freq_f and mean_freq_m 
f4_dumbell <- ggplot(agg2_wide) +
  geom_point(aes(y = selectsclass, x = mean_freq_f, shape = "Female Mentions"), color = "#DD2461", size = 3) +
  geom_point(aes(y = selectsclass, x = mean_freq_m, shape = "Male Mentions"), color = "#009E73", size = 3) +
  geom_dumbbell(aes(y = selectsclass, x = mean_freq_f, xend = mean_freq_m),
                size = 1.75, color = "grey", colour_x = "#DD2461", colour_xend = "#009E73") +
  geom_errorbar(aes(y=selectsclass, xmin=mean_freq_f - 1.96*se_f, xmax=mean_freq_f + 1.96*se_f, colour="Female"),
                width=0.2) +
  geom_errorbar(aes(y=selectsclass, xmin=mean_freq_m - 1.96*se_m, xmax=mean_freq_m + 1.96*se_m, colour="Male"),
                width=0.2) +
  geom_text(data=agg2_wide, 
            aes(x=mean_freq_f, y=selectsclass, label=percent_first(mean_freq_f)),
            size=4.5, vjust=-1.5, colour="#DD2461") +
  geom_text(data=agg2_wide, 
            aes(x=mean_freq_m, y=selectsclass, label=percent_first(mean_freq_m)),
            size=4.5, vjust=2.1, colour="#009E73") +
  geom_text(data=filter(agg2_wide, selectsclass=="Gender"),
            aes(x=.02, y=selectsclass, label="Female Mentions"),
            color="#DD2461", size=5.5, vjust=-3, fontface="bold") +
  geom_text(data=filter(agg2_wide, selectsclass=="Gender"),
            aes(x=.06, y=selectsclass, label="Male Mentions"),
            color="#009E73", size=5.5, vjust=-3, fontface="bold") +
  geom_rect(data=agg2_wide, aes(xmin=box_min, xmax=box_max, ymin=-Inf, ymax=Inf), fill="#efefe3") +
  geom_text(data=agg2_wide, aes(label=diff, y=selectsclass, x=box_center), fontface="bold", size=4.5) +
  geom_text(data=filter(agg2_wide, selectsclass=="Gender"), aes(x=box_center, y=selectsclass, label="Difference:"),
            color="#7a7d7e", size=5.5, vjust=-3, fontface="bold") +
  scale_x_continuous(labels = scales::percent_format(accuracy=.1), expand = c(0, 0), limits = c(-0.005,.10), breaks = c(0,0.025,0.05,0.075)) +
  scale_y_discrete(expand = expansion(add =  c(1,1))) +
  ddl_theme() +
  facet_wrap(~year, ncol = 3) +
  labs(#title = 'Average Share of Mentions for each Topic for an average Candidate',
       y = '', x = 'Share [%]', color = "", shape = "") +
  scale_shape_manual(values = shapes) +
  scale_color_manual(name = "", values = c("Female" = "#DD2461", "Male" = "#009E73"),
                     labels = c("Female Mentions", "Male Mentions")) +
  theme(legend.position = "bottom", legend.direction = "horizontal",
        strip.background = element_blank(), strip.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 0, hjust = .5, vjust = 1, size = 20),
        axis.text.y = element_text(hjust=0, size = 20),
        strip.text.x = element_text(size = 20),
        axis.title = element_text(size = 20),
        plot.title = element_text(size = 24),
        legend.text = element_text(size = 20),
        plot.margin = unit(c(.5,1.3,.5,.5), "cm"),
        legend.key.size = unit(1.5,"line"),
        axis.line.x = element_line(color="black", size = .5),
        axis.line.y = element_line(color="black", size = .5),
        panel.spacing.x=unit(2.5, "lines"))

f4_dumbell

ggsave(plot = f4_dumbell, filename='../img_main/figure_2.png',width=16, height=12, dpi = 300, bg = "white")
